# -*- coding: utf-8 -*-
# @Time    : 2024/1/9 20:38
# @Author  : micah
# @File    : 10.使用xpath完成豆瓣电影评论的获取.py
# @Software: PyCharm


import requests
from lxml import etree


url = 'https://movie.douban.com/subject/1292052/comments?status=P'

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}

response = requests.get(url, headers=headers).text
tree = etree.HTML(response)

# 获取所有数据标签的父标签
# 分组提取
element_list = tree.xpath('//div[@class="comment"]')

for temp in element_list:
    update_time = temp.xpath('./h3/span[@class="comment-info"]/span[@class="comment-time "]/@title')
    content = temp.xpath('.//span[@class="short"]/text()')
    print(update_time, content)
